Code
# Set options
knitr:: opts_chunk$ set (
echo = TRUE ,
warning = FALSE ,
message = FALSE ,
fig.align = 'center' ,
fig.retina = 2
)
rm (list= ls ())
library (tinytex)
Warning: package 'tinytex' was built under R version 4.5.2
Code
library (ggplot2)
#library(table1)
library (gt)
library (survival)
library (data.table)
library (randomForest)
library (grf)
library (policytree)
library (DiagrammeR)
#library(grid)
#library(forestploter)
#library(randomizr)
# library(devtools)
# install_github("larry-leon/weightedsurv", force = TRUE)
#install.packages("weightedsurv")
# install_github("larry-leon/forestsearch", force = TRUE)
library (forestsearch)
library (weightedsurv)
# Set theme for plots
theme_set (theme_minimal (base_size = 12 ))
Summary
Reproducing main GBSG analysis
Datasetup
Code
df.analysis <- gbsg
df.analysis <- within (df.analysis,{
id <- as.numeric (c (1 : nrow (df.analysis)))
# time to months
time_months <- rfstime/ 30.4375
grade3 <- ifelse (grade== "3" ,1 ,0 )
treat <- hormon
})
confounders.name <- c ("age" ,"meno" ,"size" ,"grade3" ,"nodes" ,"pgr" ,"er" )
outcome.name <- c ("time_months" )
event.name <- c ("status" )
id.name <- c ("id" )
treat.name <- c ("hormon" )
Kaplan-Meier curves and baseline summary
Code
dfcount <- df_counting (
df = df.analysis,
by.risk = 6 ,
tte.name = outcome.name,
event.name = event.name,
treat.name = treat.name
)
plot_weighted_km (dfcount, conf.int = TRUE , show.logrank = TRUE , ymax = 1.05 , xmed.fraction = 0.775 , ymed.offset = 0.125 )
Code
create_summary_table (data = df.analysis, treat_var = treat.name,
table_title = "GBSG Characteristics by Treatment Arm" ,
vars_continuous= c ("age" ,"nodes" ,"size" ,"er" ,"pgr" ),
vars_categorical= c ("grade" ,"grade3" ),
font_size = 12 )
Characteristic
Control (n=440)
Treatment (n=246)
P-value
SMD
age
Mean (SD)
51.1 (10.0)
56.6 (9.4)
<0.001
0.57
nodes
Mean (SD)
4.9 (5.6)
5.1 (5.3)
0.665
0.03
size
Mean (SD)
29.6 (14.4)
28.8 (14.1)
0.470
0.06
er
Mean (SD)
79.7 (124.2)
125.8 (191.1)
<0.001
0.30
pgr
Mean (SD)
102.0 (170.0)
124.3 (249.7)
0.213
0.11
grade
0.273
0.06
1
48 (10.9%)
33 (13.4%)
2
281 (63.9%)
163 (66.3%)
3
111 (25.2%)
50 (20.3%)
grade3
0.174
0.05
0
329 (74.8%)
196 (79.7%)
1
111 (25.2%)
50 (20.3%)
GRF analysis
Code
## GRF
grf_est1 <- grf.subg.harm.survival (data= df.analysis,
confounders.name = confounders.name,
outcome.name= outcome.name, event.name= event.name, id.name= id.name, treat.name= treat.name,
maxdepth = 2 , n.min = 60 , dmin.grf = 12 , frac.tau= 0.6 , details= TRUE )
tau, maxdepth = 46.75811 2
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
2 3 -4.10 604.00 1.06 1
11 4 -7.90 112.00 2.81 2
21 5 3.86 177.00 1.87 2
4 7 -5.89 356.00 1.33 2
Selected subgroup:
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "er <= 0"
All splits:
[1] "er <= 0" "age <= 50" "age <= 43"
Code
# NOTE : In general for GRF trees
# leaf1 --> recommend control
# leaf2 --> recommend treatment
# Tree depth 1
plot (grf_est1$ tree1,leaf.labels= c ("Control" ,"Treat" ))
Code
# Tree depth 2
plot (grf_est1$ tree2,leaf.labels= c ("Control" ,"Treat" ))
Forestsearch with depth=2 (maxk = 2)
Code
# Setup parallel processing
library (doFuture)
library (doRNG)
registerDoFuture ()
registerDoRNG ()
system.time ({fs <- forestsearch (df.analysis, confounders.name = confounders.name,
outcome.name = "time_months" , treat.name = "hormon" , event.name = "status" , id.name = "id" ,
potentialOutcome.name = NULL ,
df.test = NULL ,
flag_harm.name = NULL ,
hr.threshold = 1.25 , hr.consistency = 1.0 , pconsistency.threshold = 0.90 ,
sg_focus = "maxSG" , max_subgroups_search = 30 ,
showten_subgroups = TRUE , details= TRUE ,
conf_force = NULL ,
cut_type = "default" , use_grf = TRUE , plot.grf = TRUE , use_lasso = FALSE ,
maxk = 2 ,
n.min = 60 , d0.min = 12 , d1.min = 12 ,
plot.sg = TRUE , by.risk = 6 ,
parallel_args = list (plan= "callr" , workers = 30 , show_message = TRUE )
)
})
GRF stage for cut selection with dmin,tau= 12 0.6
tau, maxdepth = 46.75811 2
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
2 3 -4.10 604.00 1.06 1
11 4 -7.90 112.00 2.81 2
21 5 3.86 177.00 1.87 2
4 7 -5.89 356.00 1.33 2
Selected subgroup:
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "er <= 0"
All splits:
[1] "er <= 0" "age <= 50" "age <= 43"
# of continuous/categorical characteristics 5 2
Continuous characteristics: age size nodes pgr er
Categorical characteristics: meno grade3
Default cuts included (1st 20) age <= mean(age) age <= median(age) age <= qlow(age) age <= qhigh(age) size <= mean(size) size <= median(size) size <= qlow(size) size <= qhigh(size) nodes <= mean(nodes) nodes <= median(nodes) nodes <= qlow(nodes) nodes <= qhigh(nodes) pgr <= mean(pgr) pgr <= median(pgr) pgr <= qlow(pgr) pgr <= qhigh(pgr) er <= mean(er) er <= median(er) er <= qlow(er) er <= qhigh(er)
Categorical: meno grade3
Factors per GRF: er <= 0 age <= 50 age <= 43
Initial GRF cuts included er <= 0 age <= 50 age <= 43
===== CONSOLIDATED CUT EVALUATION (IMPROVED) =====
Evaluating 25 cut expressions once and caching...
Cut evaluation summary:
Total cuts: 25
Valid cuts: 25
Errors: 0
✓ All 25 factors validated as 0/1
===== END CONSOLIDATED CUT EVALUATION =====
# of candidate subgroup factors= 25
[1] "er <= 0" "age <= 50" "age <= 43" "age <= 53.1" "age <= 53"
[6] "age <= 46" "age <= 61" "size <= 29.3" "size <= 25" "size <= 20"
[11] "size <= 35" "nodes <= 5" "nodes <= 3" "nodes <= 1" "nodes <= 7"
[16] "pgr <= 110" "pgr <= 32.5" "pgr <= 7" "pgr <= 131.8" "er <= 96.3"
[21] "er <= 36" "er <= 8" "er <= 114" "meno" "grade3"
Number of factors evaluated= 25
Confounders per grf screening q1 q3 q2 q25 q8 q9 q22 q18 q13 q7 q20 q11 q14 q23 q16 q21 q10 q12 q17 q15 q6 q24 q19 q4 q5
Factors Labels VI(grf)
1 er <= 0 q1 0.1276
3 age <= 43 q3 0.1041
2 age <= 50 q2 0.1004
25 grade3 q25 0.0533
8 size <= 29.3 q8 0.0502
9 size <= 25 q9 0.0440
22 er <= 8 q22 0.0430
18 pgr <= 7 q18 0.0383
13 nodes <= 3 q13 0.0365
7 age <= 61 q7 0.0347
20 er <= 96.3 q20 0.0328
11 size <= 35 q11 0.0327
14 nodes <= 1 q14 0.0308
23 er <= 114 q23 0.0308
16 pgr <= 110 q16 0.0306
21 er <= 36 q21 0.0302
10 size <= 20 q10 0.0286
12 nodes <= 5 q12 0.0280
17 pgr <= 32.5 q17 0.0278
15 nodes <= 7 q15 0.0264
6 age <= 46 q6 0.0189
24 meno q24 0.0168
19 pgr <= 131.8 q19 0.0132
4 age <= 53.1 q4 0.0111
5 age <= 53 q5 0.0091
Number of possible configurations (<= maxk): maxk = 2 , # combinations = 1275
Events criteria: control >= 12 , treatment >= 12
Subgroup search completed in 0.02 minutes
Found 23 subgroup candidate(s)
# of candidate subgroups (meeting all criteria) = 23
Removed 1 near-duplicate subgroups
Original rows: 23
After removal: 22
# of unique initial candidates: 22
# Restricting to top stop_Kgroups = 30
# of candidates restricted to 'top K': 22
Using parallel processing: callr with 30 workers
*** Not met: Subgroup, % Consistency = {age <= 50} {pgr <= 110} 0.69
*** Not met: Subgroup, % Consistency = !{age <= 43} {age <= 50} 0.8
*** Not met: Subgroup, % Consistency = {age <= 50} {pgr <= 32.5} 0.75
*** Not met: Subgroup, % Consistency = {er <= 8} {pgr <= 7} 0.73
*** Not met: Subgroup, % Consistency = {age <= 50} !{age <= 46} 0.59
*** Not met: Subgroup, % Consistency = {er <= 8} !{nodes <= 3} 0.62
*** Not met: Subgroup, % Consistency = !{size <= 25} {er <= 8} 0.85
*** Not met: Subgroup, % Consistency = {age <= 50} {er <= 8} 0.68
*** Not met: Subgroup, % Consistency = {er <= 8} !{meno} 0.85
*** Not met: Subgroup, % Consistency = {grade3} {er <= 8} 0.77
Consistency met!
# of splits = 1000
**** Subgroup, % Consistency Met= {er <= 0} 0.95
*** Not met: Subgroup, % Consistency = !{size <= 29.3} {er <= 8} 0.89
Consistency met!
# of splits = 1000
**** Subgroup, % Consistency Met= {er <= 0} {pgr <= 32.5} 0.97
*** Not met: Subgroup, % Consistency = {grade3} {pgr <= 7} 0.82
*** Not met: Subgroup, % Consistency = {age <= 50} {pgr <= 7} 0.83
*** Not met: Subgroup, % Consistency = !{size <= 35} {nodes <= 5} 0.56
Consistency met!
# of splits = 1000
**** Subgroup, % Consistency Met= {er <= 0} !{age <= 43} 0.96
Consistency met!
# of splits = 1000
**** Subgroup, % Consistency Met= {er <= 0} {pgr <= 7} 0.93
*** Not met: Subgroup, % Consistency = !{size <= 29.3} !{er <= 114} 0.42
*** Not met: Subgroup, % Consistency = !{size <= 35} !{age <= 53.1} 0.45
Consistency met!
# of splits = 1000
**** Subgroup, % Consistency Met= {er <= 0} {size <= 35} 0.98
Consistency met!
# of splits = 1000
**** Subgroup, % Consistency Met= {er <= 0} !{size <= 20} 0.93
*** Subgroup found: {er <= 0}
% consistency criteria met= 0.95
SG focus= maxSG
Subgroup Consistency Minutes= 0.0831
Subgroup found (FS) with sg_focus='maxSG'
Selected subgroup: {er <= 0}
Minutes forestsearch overall = 0.11
user system elapsed
53.539 2.416 6.843
Code
plan ("sequential" )
# Results for estimation (training) data, which_df = "est" is default
res_tabs <- sg_tables (fs, ndecimals = 3 , which_df = "est" )
res_tabs$ sg10_out
Two-factor subgroups (maxk=2)
{er <= 0}
82
45
16
1.951
0.950
{er <= 0}
{pgr <= 32.5}
75
41
16
2.222
0.970
{er <= 0}
!{age <= 43}
68
38
14
2.164
0.960
{er <= 0}
{pgr <= 7}
64
34
13
1.992
0.930
{er <= 0}
{size <= 35}
61
34
15
2.537
0.980
{er <= 0}
!{size <= 20}
61
35
12
2.054
0.930
Search Configuration: Single-factor candidates (L) = 50; Maximum combinations evaluated = 1,275; Search depth (maxk) = 2
Search Results: Candidate subgroups found = 23; Maximum HR estimate = 2.54
Note: E1 = events in treatment arm; Pcons = consistency proportion
Code
Training data estimates
ITT
686 (100.0%)
246 (35.9%)
299 (43.6%)
66.3
50.2
7.8
0.69 (0.54, 0.89)
Questionable
82 (12.0%)
26 (31.7%)
45 (54.9%)
22.9
43.7
-14
1.95 (1.04, 3.67)
Recommend
604 (88.0%)
220 (36.4%)
254 (42.1%)
66.7
52.6
9.3
0.61 (0.47, 0.80)
Bootstrap Inference
Code
#output_dir <- "dev/vignettes-working/applications/gbsg/results"
output_dir <- "results/"
save_results <- dir.exists (output_dir)
# patchhwork needed for a combined bootstrap plot (otherwise if not avaialable will not produce)
library (patchwork)
# Number of bootstrap samples
NB <- 10
system.time ({fs_bc <- forestsearch_bootstrap_dofuture (
fs.est = fs,
nb_boots = NB,
show_three = FALSE ,
details = TRUE )
})
Ystar matrix generated should be 'boots x N': 10 x 686
=== Bootstrap Analysis Complete ===
Success rate: 90.0% (9/10)
H (Questionable) Estimates:
Unadjusted: 1.95 (1.04,3.67)
Bias-corrected: 1.85 (0.83,4.15)
Hc (Recommend) Estimates:
Unadjusted: 0.61 (0.47,0.8)
Bias-corrected: 0.62 (0.02,16.94)
===================================
user system elapsed
320.551 7.017 49.942
Code
plan ("sequential" )
if (save_results) {
filename <- file.path (output_dir,
paste0 ("gbsg_k2_B=" ,
format (NB),
".RData" ))
save (df.analysis, fs, fs_bc, file = filename)
cat (" \n Results saved to:" , filename, " \n " )
}
Results saved to: results//gbsg_k2_B=10.RData
Diagnostics and Summaries
Code
#load("~/Documents/GitHub/forestsearch/vignettes/results/sim_gbsg_example_B=1000.RData")
output_dir <- "results/"
#NB <- 1000
load_results <- dir.exists (output_dir)
if (load_results){
filename <- file.path (output_dir,
paste0 ("gbsg_k2_B=" ,
format (NB),".RData" ))
load (file = filename)
}
summaries <- summarize_bootstrap_results (
sgharm = fs$ sg.harm,
boot_results = fs_bc,
create_plots = TRUE ,
est.scale = "hr"
)
===============================================================
BOOTSTRAP ANALYSIS SUMMARY
===============================================================
BOOTSTRAP SUCCESS METRICS:
-------------------------------------------------------------
Total iterations: 10
Successful subgroup ID: 9 (90.0%)
Failed to find subgroup: 1 (10.0%)
TIMING ANALYSIS:
-------------------------------------------------------------
Overall:
Total bootstrap time: 0.81 minutes (0.01 hours)
Average per iteration: 0.08 min (4.9 sec)
Projected for 1000 boots: 81.32 minutes (1.36 hours)
Per-iteration timing:
Mean: 0.50 min (29.9 sec)
Median: 0.44 min (26.7 sec)
Std Dev: 0.26 minutes
Range: [0.17, 0.79] minutes
IQR: [0.31, 0.77] minutes
ForestSearch timing (successful iterations only):
Iterations with FS: 10 (100.0%)
Mean FS time: 0.50 min (29.8 sec)
Median FS time: 0.44 min (26.7 sec)
Total FS time: 4.97 minutes
FS time % of total: 611.7%
Overhead timing (Cox models, bias correction, etc.):
Mean overhead: 0.00 min (0.0 sec)
Median overhead: 0.00 min (0.0 sec)
Total overhead: 0.00 minutes
Overhead % of total: 0.4%
PERFORMANCE ASSESSMENT:
-------------------------------------------------------------
Performance rating: ✓✓✓ Excellent
Average iteration speed: 4.9 seconds
===============================================================
Code
sg_tab <- summaries$ table
sg_tab
Bootstrap bias-corrected estimates (10 iterations)
N
NT
Events
MedT
MedC
RMSTd
HR (95% CI)†
HR‡ (95% CI)
Qstnbl
82 (12.0%)
26 (31.7%)
45 (54.9%)
22.9
43.7
-14
1.95 (1.04, 3.67)
1.85 (0.83,4.15)
Recmnd
604 (88.0%)
220 (36.4%)
254 (42.1%)
66.7
52.6
9.3
0.61 (0.47, 0.80)
0.62 (0.02,16.94)
Note : Med = Median survival time (months). RMSTd = Restricted mean survival time difference. Subgroup identified in 90.0% of bootstrap samples.
Code
event_summary <- summarize_bootstrap_events (fs_bc, threshold = 12 )
=== Bootstrap Event Count Summary ===
Total bootstrap iterations: 10
Event threshold: <12 events
ORIGINAL Subgroup H on BOOTSTRAP samples:
Control arm <12 events: 0 (0.0%)
Treatment arm <12 events: 0 (0.0%)
Either arm <12 events: 0 (0.0%)
ORIGINAL Subgroup Hc on BOOTSTRAP samples:
Control arm <12 events: 0 (0.0%)
Treatment arm <12 events: 0 (0.0%)
Either arm <12 events: 0 (0.0%)
NEW Subgroups found: 9 (90.0%)
NEW Subgroup H* on ORIGINAL data:
Control arm <12 events: 0 (0.0% of successful)
Treatment arm <12 events: 0 (0.0% of successful)
Either arm <12 events: 0 (0.0% of successful)
NEW Subgroup Hc* on ORIGINAL data:
Control arm <12 events: 0 (0.0% of successful)
Treatment arm <12 events: 0 (0.0% of successful)
Either arm <12 events: 0 (0.0% of successful)
Code
summaries$ diagnostics_table_gt
Analysis of 10 bootstrap iterations
Success Rate
Total iterations
10
Successful subgroup ID
9 (90.0%)
Failed to find subgroup
1 (10.0%)
Success rating
Excellent ✓✓✓
Subgroup H (Questionable)
Unadjusted estimate
1.95 (1.04, 3.67)
Bias-corrected estimate
1.85 (0.83, 4.15)
Bias correction impact
5.0%
CI width change
2.64 -> 3.32
Subgroup Hc (Recommend)
Unadjusted estimate
0.61 (0.47, 0.80)
Bias-corrected estimate
0.62 (0.02, 16.94)
Bias correction impact
0.7%
CI width change
0.33 -> 16.92
Bootstrap Quality: H
Valid iterations
9
Mean (SD)
0.62 (0.25)
Coefficient of variation
41.2%
Skewness
-0.47
Bootstrap Quality: Hc
Valid iterations
9
Mean (SD)
-0.48 (0.21)
Coefficient of variation
43.1%
Skewness
-0.38
Search Performance
Mean max HR found
2.94 (1.22)
Mean factors evaluated
47.8
Mean combinations tried
1171
Proportion at maxk
--
Interpretation Guide:
✓ Excellent stability : Subgroup is consistently identified across bootstrap samples.
⚠ High variability : Bootstrap estimates are imprecise (CV >= 25%). Consider increasing nb_boots or sample size.
Code
summaries$ subgroup_summary$ original_agreement
Metric Value
<char> <char>
1: Total bootstrap iterations 10
2: Successful iterations 9
3: Failed iterations (no subgroup) 1
4: Exact match with original 0 (0.0%)
5: Different from original 9 (100.0%)
Code
summaries$ subgroup_summary$ factor_presence
Rank Factor Count Percent
2 1 er 5 55.55556
5 2 pgr 4 44.44444
6 3 size 4 44.44444
1 4 age 2 22.22222
4 5 nodes 2 22.22222
3 6 grade3 1 11.11111
Code
summaries$ subgroup_summary$ factor_presence_specific
Rank Base_Factor Factor_Definition Count Percent
1 1 age !{age <= 44} 1 11.11111
9 2 age {age <= 52.8} 1 11.11111
11 3 er {er <= 8} 2 22.22222
2 4 er !{er <= 99.2} 1 11.11111
3 5 er !{er <= 99} 1 11.11111
10 6 er {er <= 120} 1 11.11111
12 7 grade3 {grade3} 1 11.11111
4 8 nodes !{nodes <= 1} 1 11.11111
5 9 nodes !{nodes <= 5.4} 1 11.11111
13 10 pgr {pgr <= 112.3} 1 11.11111
14 11 pgr {pgr <= 134.2} 1 11.11111
15 12 pgr {pgr <= 7} 1 11.11111
16 13 pgr {pgr <= 8} 1 11.11111
6 14 size !{size <= 25} 2 22.22222
7 15 size !{size <= 29.6} 1 11.11111
8 16 size !{size <= 29.8} 1 11.11111
Forest Search n-fold cross-validation
Code
output_dir <- "results/"
load_results <- dir.exists (output_dir)
#NB <- 1000
if (load_results){
filename <- file.path (output_dir,
paste0 ("gbsg_k2_B=" ,
format (NB),".RData" ))
load (file = filename)
}
# Kfolds = n (default to n-fold cross-validations)
fs_OOB <- NULL
fs_OOB <- forestsearch_Kfold (fs.est = fs, details = TRUE ,
parallel_args = list (plan = "callr" , workers = 36 , show_message = TRUE ))
Cross-validation setup:
- Observations: 686
- Folds: 686
- Fold sizes (range): 1-1
Cross-validation complete:
- Time: 36.52 minutes
- Subgroup found in 98.4 % of folds
Any found: 0.983965
Exact match: 0.9635569
At least 1 match: 0.9635569
Cov 1 any: 0.9810496
Cov 2 any: NaN
Cov 1 and 2 any: 0
Cov 1 exact: 0.9635569
Cov 2 exact: NaN
Agreement (sens, ppv) in H and Hc: 0.8292683 0.9917219 0.9315068 0.9771615
Code
# Reset workers to single
plan (sequential)
summary_OOB <- forestsearch_KfoldOut (res= fs_OOB, details= TRUE , outall= TRUE )
Any found: 0.983965
Exact match: 0.9635569
At least 1 match: 0.9635569
Cov 1 any: 0.9810496
Cov 2 any: NaN
Cov 1 and 2 any: 0
Cov 1 exact: 0.9635569
Cov 2 exact: NaN
Agreement (sens, ppv) in H and Hc: 0.8292683 0.9917219 0.9315068 0.9771615
Subgroup n n1 m1 m0 RMST
Overall "ITT" "686 (100.0%)" "246 (35.9%)" "66.3" "50.2" "7.8"
FA_0 "Not recommend" "82 (12.0%)" "26 (31.7%)" "22.9" "43.7" "-14"
KfA_0 "Not recommend" "73 (10.6%)" "21 (28.8%)" NA "42.9" "11"
FA_1 "Recommend" "604 (88.0%)" "220 (36.4%)" "66.7" "52.6" "9.3"
KfA_1 "Recommend" "613 (89.4%)" "225 (36.7%)" "66.2" "55" "6.4"
Hazard ratio
Overall "0.69 (0.54, 0.89)"
FA_0 "1.95 (1.04, 3.67)"
KfA_0 "0.53 (0.22, 1.25)"
FA_1 "0.61 (0.47, 0.80)"
KfA_1 "0.72 (0.56, 0.93)"
Code
table (summary_OOB$ SGs_found[,1 ])
!{age <= 43} !{size <= 25} !{size <= 29.3} {er <= 0} {er <= 8}
2 4 2 661 6
Code
table (summary_OOB$ SGs_found[,2 ])
!{meno} !{size <= 25} !{size <= 29.3} {age <= 47} {er <= 8}
3 1 2 2 6
Code
Ksims <- 5
fs_ten <- forestsearch_tenfold (fs.est = fs, sims = Ksims, Kfolds = 10 , details= TRUE ,
parallel_args = list (plan = "callr" , workers = 36 , show_message = TRUE ))
Starting repeated K-fold cross-validation:
- Simulations: 5
- Folds per simulation: 10
- Workers: 13
Repeated K-fold CV complete:
- Time: 3.67 minutes
- Successful simulations: 5 / 5
- Projected hours per 100 sims: 1.22
Code
# Reset workers to single
plan (sequential)
print (fs_ten$ find_summary)
Any Exact At least 1 Cov1 Cov2 Cov 1 & 2 Cov1 exact
0.8 0.3 0.3 0.6 NA 0.0 0.3
Cov2 exact
NA
Code
print (fs_ten$ sens_summary)
sens_H sens_Hc ppv_H ppv_Hc
0.5000000 0.9370861 0.5189873 0.9324547
Code
print (head (fs_ten$ sens_out))
sens_H sens_Hc ppv_H ppv_Hc
[1,] 0.5731707 0.9966887 0.9591837 0.9450549
[2,] 0.4024390 0.8956954 0.3437500 0.9169492
[3,] 0.5000000 0.9370861 0.5189873 0.9324547
[4,] 0.5487805 0.9420530 0.5625000 0.9389439
[5,] 0.2682927 0.9238411 0.3235294 0.9029126
Code
print (head (fs_ten$ find_out))
Any Exact At least 1 Cov1 Cov2 Cov 1 & 2 Cov1 exact Cov2 exact
[1,] 0.6 0.5 0.5 0.6 NaN 0 0.5 NaN
[2,] 0.8 0.2 0.2 0.3 NaN 0 0.2 NaN
[3,] 0.8 0.3 0.3 0.6 NaN 0 0.3 NaN
[4,] 0.9 0.3 0.3 0.8 NaN 0 0.3 NaN
[5,] 0.7 0.1 0.1 0.4 NaN 0 0.1 NaN
Code
# Save all results
output_dir <- "results/"
save_results <- dir.exists (output_dir)
if (save_results) {
filename <- file.path (output_dir,
paste0 ("gbsg_k2_CV=" ,
format (Ksims),
".RData" ))
save (df.analysis, fs, fs_bc, fs_ten, fs_OOB, file = filename)
cat (" \n Results saved to:" , filename, " \n " )
}
Results saved to: results//gbsg_k2_CV=5.RData
Code
#Ksims <- 1000
output_dir <- "results/"
load_results <- dir.exists (output_dir)
if (load_results){
filename <- file.path (output_dir,
paste0 ("gbsg_k2_CV=" ,
format (Ksims),".RData" ))
load (file = filename)
}
#' # Define subgroups to display
subgroups <- list (
age_gt65 = list (
subset_expr = "age > 65" ,
name = "age > 65" ,
type = "reference"
),
age_lt65 = list (
subset_expr = "age <= 65" ,
name = "age <= 65" ,
type = "reference"
),
pgr_positive = list (
subset_expr = "pgr > 0" ,
name = "pgr > 0" ,
type = "reference"
),
pgr_negative = list (
subset_expr = "pgr <= 0" ,
name = "pgr <= 0" ,
type = "reference"
)
)
# Create the forest plot
result <- plot_subgroup_results_forestplot (
fs_results = list (fs.est = fs, fs_bc = fs_bc, fs_OOB = NULL , fs_kfold = fs_ten),
df_analysis = df.analysis,
subgroup_list = subgroups,
outcome.name = "time_months" ,
event.name = "status" ,
treat.name = "hormon" ,
E.name = "Hormon" ,
C.name = "CT"
)
# Display the plot
plot (result$ plot)